# compare the ideal probability vector obtained from perturbations,
# and the probability of signalling intermediates to act on interface TFs,
# through Jensen-Shannon divergence.
# Save JSD values and selected candidates.

args=commandArgs(TRUE)
cutoff=args[1]
best=args[2]

source("../scripts/Jensen-Shannon-Divergence.r")

# read network
library(igraph)
graph_str = readRDS("../data/curated_graph.Rdata")

# names of the two signalling probability methods to combine
approaches = c(	
	expweights = "intermediateToTFsP-length-cutoff%s.txt",
	correlation = "intermediateToTFsP-correlation-cutoff%s.txt"
)

# read the idealP (from interface TFs frequency in the best perturbations)
idealP=read.table(sprintf("interfaceTFsP-cutoff%s-bestcomb%s.txt",cutoff,best),sep="\t",quote="")
tmp=idealP[,1]
idealP=idealP[,2]
names(idealP)=tmp

# save JSDs obtained with different methods
results = list()
for (i in seq_along(approaches)) {
	app = approaches[i]
	# read signalling probability
	calcPs = read.table(sprintf(app,cutoff),sep="\t",quote="",check.names=FALSE,row.names=NULL)
	calcPs = calcPs[!duplicated(calcPs$row.names),]
	rownames(calcPs) = calcPs[,1]
	calcPs = calcPs[,-1]

	#exchange P activation and inhibition for same TF: P of having the opposite effect by inhibiting the intermediate
	act = calcPs[1:(nrow(calcPs)/2),] #first half is TF activation
	inh = calcPs[(nrow(calcPs)/2+1):nrow(calcPs),] #second half is TF inhibition
	inh_calcPs = rbind(inh,act) #exchange
	rownames(inh_calcPs) = rownames(calcPs)
	colnames(inh_calcPs) = paste(colnames(inh_calcPs),"inh",sep="__")
	# join activation and inhibition of intermediates together
	X = cbind(calcPs,inh_calcPs)

	# calculate Jensen-Shannon divergence between perturbation and signalling probability vectors
	JSD = apply(X[names(idealP),],2,function(x) JSDivergence(idealP,x))
	results[[names(approaches)[i]]] = JSD
}

#transform in table
ints = unique(unlist(lapply(results,names)))
R = matrix(NA,nrow=length(ints),ncol=length(approaches),dimnames=list(ints,names(approaches)))
for (r in names(results)){
	R[names(results[[r]]),r] = results[[r]]
}
	
write.table(R,sprintf("JSDs-cutoff%s-bestcomb%s.txt",cutoff,best),sep="\t",quote=FALSE)

# calculate minRanks across the two signalling probability methods
R1 = apply(R,2,rank,ties.method="min",na.last=TRUE)
R1[which(is.na(R))] = NA
minRanks = pmin(R1[,1],R1[,2])

write.table(cbind(minRanks),sprintf("minRanks-cutoff%s-bestcomb%s.txt",cutoff,best),sep="\t",col.names=FALSE,quote=FALSE)

# save the selected candidates with their rank and associated gene symbols
threshold = max(minRanks,na.rm=TRUE) * 0.06
sel = minRanks[minRanks <= threshold]
x = data.frame(sel,V(graph_str)[gsub("__inh","",names(sel))]$symbol, stringsAsFactors=FALSE)

cat("# Candidate network objects, from MetaCore from Clarivate Analytics\n# '__inh' indicates that the node should be inhibited; activated otherwise\n\n#Network object (with action)\tRank\tGene symbol(s)\n",file=sprintf("candidates-cutoff%s-bestcomb%s.txt",cutoff,best))

write.table(x[order(x$sel),],sprintf("candidates-cutoff%s-bestcomb%s.txt",cutoff,best),sep="\t",col.names=FALSE,quote=FALSE,append=TRUE)

